

/*************************************************************************/
/*                                                                       */
/*  Copyright (c) 1994 Stanford University                               */
/*                                                                       */
/*  All rights reserved.                                                 */
/*                                                                       */
/*  Permission is given to use, copy, and modify this software for any   */
/*  non-commercial purpose as long as this copyright notice is not       */
/*  removed.  All other uses, including redistribution in whole or in    */
/*  part, are forbidden without prior written permission.                */
/*                                                                       */
/*  This software is provided with absolutely no warranty and no         */
/*  support.                                                             */
/*                                                                       */
/*************************************************************************/

#include <stdio.h>

#define FitsInCache 2048

#define MAX_PROC 1024
#define PAGE_SIZE 4096

typedef struct {
	long n, m, *col, *startrow, *row;
	double *nz;
	} SMatrix;

struct Pair {
	long block_num;
	struct Pair *next;
	};

typedef struct {
        long i, j, owner, remaining, nmod;
	long length; /* number of full rows in block */
	long parent; /* block number of parent block */
	double checksum;
	long *structure, *relative;
	double *nz;
	struct Pair *pair;
      } Block;

typedef union {
        Block *block;
	double nz;
      } Entry;
       
typedef struct {
        long n, *col, *row, n_blocks, n_entries, entries_allocated;
	long *partition_size, *renumbering, *mapI, *mapJ;
	long *domain, *domains, n_domains, *proc_domains;
	long n_partitions, max_partition;
	double **proc_domain_storage;
	Entry *entry;
      } BMatrix;

#define BLOCK(bl_no) (LB.entry[bl_no].block)
#define BLOCKROW(bl_no) (BLOCK(bl_no)->i)
#define BLOCKCOL(bl_no) (BLOCK(bl_no)->j)
#define OWNER(bl_no) (BLOCK(bl_no)->owner)

struct Update {
	long i, j, src, remaining;
	long dimi, dimj, *structi, *structj;
	double *update;
	struct Update *next;
      };

struct Task {
  long block_num, desti, destj, src;
  struct Update *update;
  struct Task *next;
  };

struct GlobalMemory {
	struct { pthread_mutex_t bar_mutex; pthread_cond_t bar_cond; unsigned bar_teller; } start;
	pthread_mutex_t waitLock;
	pthread_mutex_t memLock;
	unsigned long runtime[MAX_PROC];
	};

struct BlockList {
	long theBlock, row, col, length;
	long *structure;
	double *nz;
	struct BlockList *next;
	};

struct LocalCopies {
       double *blktmp;
       long max_panel;
       long *link;
       long *first;
       double *storage;
       double *updatetmp;
       long *relative;
       struct Update *freeUpdate;
       struct Task *freeTask;
       unsigned long rs;
       unsigned long rf;
       unsigned long us;
       unsigned long uf;
       unsigned long ss;
       unsigned long sf;
       unsigned long runtime;
       unsigned long runs;
       };

#define DISTRIBUTED 888

#define max(a, b) ((a) > (b) ? (a) : (b))
#define min(a, b) ((a) < (b) ? (a) : (b))

#define LB_DOMAINS 2

#define EMBED 1

#define NO_PERM 1

#define FAN_OUT 2

/*
 * amal.C
 */
long OpsFromSuper(long size, long nz);
long CountSupers(long cols, long *node);
void Amalgamate2(long join, SMatrix M, long *T, long *nz, long *node, long *domain, long target_size);
void ConsiderMerge(long join, long super, SMatrix M, long *nz, long *node, long *domain, long target_size, long traversal_order);
void JoinTwoSupers2(long *nz, long *node, long child, long parent);
void ReorderMatrix(SMatrix M, long super, long *node, long *counter, long *PERM);
void FixNodeNZAndT(SMatrix M, long *PERM, long *node, long *nz, long *T);
void InvertPerm(long n, long *PERM, long *INVP);
double PathLength(long cols, long rows, long target_panel_size);

/*
 * assign.C
 */
void PDIV(long src_col, long src_nz, double *ops, double *misses, double *runtime);
void PMOD(long src_col, long dest_col, long dest_nz, double *ops, double *misses, double *runtime);
void PADD(long cols, long rows, double *misses, double *runtime);
void AssignBlocksNow(void);
void EmbedBlocks(void);

/*
 * bfac.C
 */
void BFac(long diag, struct LocalCopies *lc);
void OneFac(double *A, long n1, long n2);
void BDiv(long n1, long n3, double *diag_nz, double *below_nz, struct LocalCopies *lc);
void OneDiv(double *A, double *B, long n1, long n3, long n4);
void BMod(long n1, long n2, long n3, double *top_nz, double *bend_nz, double *dest_nz, struct LocalCopies *lc);
void CopyBlock(double *B, double *dest, long n3, long is, long ks, long il, long kl);
void CopyBlockBack(double *B, double *src, long n3, long is, long ks, long il, long kl);
void OneMatmat(double *B, double *A, double *C, long n1, long n2, long n3, long n4, long n5);
void BLMod(long n1, long n2, double *left_nz, double *dest_nz, struct LocalCopies *lc);
void OneLower(double *A, double *C, long n1, long n2, long n3);
void FindBlockUpdate(long domain, long blj, long bli, double **update, long *stride);

/*
 * bksolve.C
 */
double *TriBSolve(BMatrix LB, double *b, long *PERM);
double ComputeNorm(double *x, long n);
double *CreateVector(SMatrix M);

/*
 * block2.C
 */
void CreateBlockedMatrix2(SMatrix M, long block_ub, long *T, long *firstchild, long *child, long *PERM, long *INVP, long *domain, long *partition);
long FindNumPartitions(long set_size, long piece_size);
void ComputeBlockParents(long *T);
void FillInStructure(SMatrix M, long *firstchild, long *child, long *PERM, long *INVP);
void FillInNZ(SMatrix M, long *PERM, long *INVP);
void FindDomStructure(long super, long *nz, long n_nz);
void FindDummyDomainStructure(long which_domain);
void CheckColLength(long col, long n_nz);
void FindBlStructure(SMatrix M, long super, long *PERM, long *INVP, long *firstchild, long *child, long *structure, long *nz);
void FindSuperStructure(SMatrix M, long super, long *PERM, long *INVP, long *firstchild, long *child, long *structure, long *nz, long *n_nz);
void FindDetailedStructure(long col, long *structure, long *nz, long n_nz);
void AllocateNZ(void);
void FillIn(SMatrix M, long col, long *PERM, long *INVP, double *scatter);
void InsSort(long *nz, long n);
long BlDepth(long col);
void SortByKey(long n, long *blocks, long *keys);
void DumpSizes(BMatrix LB, long *domain, long *sizes);
void ComputePartitionNumbering(long *numbering);
void FindMachineDimensions(long P);
long EmbeddedOwner(long block);

/*
 * fo.C
 */
void PreProcessFO(long MyNum);
void PreAllocate1FO(void);
void PreAllocateFO(long MyNum, struct LocalCopies *lc);
void BNumericSolveFO(long MyNum, struct LocalCopies *lc);
void DriveParallelFO(long MyNum, struct LocalCopies *lc);
long HandleTaskFO(long MyNum, struct LocalCopies *lc);
void DiagReceived(long diag, long MyNum, struct LocalCopies *lc);
void BlockReceived(long block, long MyNum, struct LocalCopies *lc);
struct BlockList *CopyOneBlock(long block, long MyNum);
void FreeColumnListFO(long p, long col);
void DecrementRemaining(long dest_block, long MyNum, struct LocalCopies *lc);
void PerformUpdate(struct BlockList *above_bl, struct BlockList *below_bl, long MyNum, struct LocalCopies *lc);
void DistributeUpdateFO(long which_domain, long MyNum, struct LocalCopies *lc);
void HandleUpdate2FO(long which_domain, long bli, long blj, long MyNum, struct LocalCopies *lc);
void FindRelativeIndices(long *src_structure, long src_len, long *dest_structure, long *relative);
void BlockReadyFO(long block, long MyNum, struct LocalCopies *lc);
void BlockDoneFO(long block, long MyNum, struct LocalCopies *lc);
void CheckRemaining(void);
void CheckReceived(void);
void ComputeRemainingFO(void);
void InitRemainingFO(long MyNum);
void ComputeReceivedFO(void);
void InitReceivedFO(long MyNum);
void ScatterUpdateFO(long dimi, long *structi, long dimj, long *structj, long destdim, double *oldupdate, double *newupdate);
void ScatterUpdateFO2(long dimi, long *structi, long dimj, long *structj, long stride, long destdim, double *oldupdate, double *newupdate);

/*
 * malloc.C
 */
void MallocInit(long P);
void InitOneFreeList(long p);
void MallocStats(void);
long FindBucket(long size);
char *MyMalloc(long size, long home);
void MigrateMem(void *start, long length, long home);
void MyFree(void *block);
void MyFreeNow(void *block);

/*
 * mf.C
 */
void InitTaskQueues(long P);
long FindBlock(long i, long j);
void Send(long src_block, long dest_block, long desti, long destj, struct Update *update, long p, long MyNum, struct LocalCopies *lc);
long TaskWaiting(long MyNum);
void GetBlock(long *desti, long *destj, long *src, struct Update **update, long MyNum, struct LocalCopies *lc);

/*
 * numLL.C
 */
void FactorLLDomain(long which_domain, long MyNum, struct LocalCopies *lc);
void CompleteSupernodeB(long super);
void CompleteColumnB(long j);
void FindRelativeIndicesLeft(long *src_structure, long rows_in_update, long offset, long *indices, long *relative);
void ScatterSuperUpdate(double *update, long cols_in_update, long rows_in_update, double *dest_nz, long dest_len, long *relative);
void ModifySuperByColumn(double *src_nz, long cols_in_update, long rows_in_update, double *dest_nz, long dest_len, long *relative);
void SetDestIndices(long super, long *indices);
void SetDomainIndices(long super, long *indices);
void ModifySuperBySuper(long src, long theFirst, long theLast, long length, double *dest);
void ModifyTwoBySupernodeB(long super, long lastcol, long theFirst, double *destination0, double *destination1);
void ModifyBySupernodeB(long super, long lastcol, long theFirst, double *destination);

/*
 * parts.C
 */
void Partition(SMatrix M, long parts, long *T, long *assigned_ops, long *domain, long *domains, long *proc_domains);
void MarkSubtreeAsDomain(long *domain, long root);
void NumberPartition(long parts, long *assigned_ops, long distribute);
long MaxBucket(long *assigned_ops, long parts);
long MinBucket(long *assigned_ops, long parts);
struct Chunk *NewChunk(void);
struct Chunk *GetChunk(void);

/*
 * seg.C
 */
void ComputeTargetBlockSize(SMatrix M, long P);
void FindMaxHeight(SMatrix L, long root, long height, long *maxm);
void NoSegments(SMatrix M);
void CreatePermutation(long n, long *PERM, long permutation_method);

/*
 * solve.C
 */
void Go(void);
void PlaceDomains(long P);
void ComposePerm(long *PERM1, long *PERM2, long n);

/*
 * tree.C
 */
void EliminationTreeFromA(SMatrix A, long *T, long *P, long *INVP);
void ParentToChild(long *T, long n, long *firstchild, long *child);
void ComputeNZ(SMatrix A, long *T, long *nz, long *PERM, long *INVP);
void FindSupernodes(SMatrix A, long *T, long *nz, long *node);
void ComputeWorkTree(SMatrix A, long *nz, double *work_tree);

/*
 * util.C
 */
SMatrix NewMatrix(long n, long m, long nz);
void FreeMatrix(SMatrix M);
double *NewVector(long n);
double Value(long i, long j);
SMatrix ReadSparse(char *name, char *probName);
void DumpLine(FILE *fp);
void ParseIntFormat(char *buf, long *num, long *size);
void ReadVector(FILE *fp, long n, long *where, long perline, long persize);
SMatrix LowerToFull(SMatrix L);
void ISort(SMatrix M, long k);

